library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0 ✔ purrr 0.2.5
## ✔ tibble 2.0.1 ✔ dplyr 0.8.0.1
## ✔ tidyr 0.8.1 ✔ stringr 1.3.1
## ✔ readr 1.1.1 ✔ forcats 0.3.0
## Warning: package 'tibble' was built under R version 3.5.2
## Warning: package 'dplyr' was built under R version 3.5.2
## ── Conflicts ────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(readxl)
library(httr)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:httr':
##
## config
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(RColorBrewer)
GET('http://www.africapolis.org/download/Africapolis_agglomeration_2015.xlsx',
write_disk(africapolis_impt <- tempfile(fileext = ".xls")))
## Response [http://www.africapolis.org/download/Africapolis_agglomeration_2015.xlsx]
## Date: 2019-04-02 19:41
## Status: 200
## Content-Type: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
## Size: 1.65 MB
## <ON DISK> /var/folders/k5/7wfb52hs61v78z05j3k54rq80000gn/T//RtmpS74KH5/file3cb57d1a9fec.xls
# ggplot(afr_cities, aes(x = Population_2015, y = Builtup)) +
# geom_point()
hub_cities <- afr_cities %>%
drop_na(closest_metro) %>%
group_by(closest_metro) %>%
summarize(n = n()) %>%
arrange(desc(n)) %>%
head(5)
hub_cities
## # A tibble: 5 x 2
## closest_metro n
## <chr> <int>
## 1 Cairo 1063
## 2 Onitsha 424
## 3 Alger 383
## 4 Kano 377
## 5 Ibadan 277
# ggplot(test, aes(x = Population_2015, y = Builtup)) +
# theme_classic() +
# scale_y_continuous(labels = scales::comma) +
# scale_x_continuous(labels = scales::comma) +
# geom_point() +
# geom_smooth(method = 'lm', formula = y ~ splines::bs(x, 3), se = FALSE) +
# facet_grid(rows = vars(factor(closest_metro)))
color <- brewer.pal(4, "Set1")
# set up title formatting
title <- list(
text = "Relationship Between Job Prestige of R and Spouse",
xref = "paper", yref = "paper",
yanchor = "bottom", xanchor = "left",
align = "left", x = 0, y = 1.2,
showarrow = FALSE
)
plots <- list()
idx <- 0
for (hub in hub_cities$closest_metro){
idx <- idx + 1
a <- list(
text = hub_cities$closest_metro[idx],
xref = "paper",
yref = "paper",
yanchor = "bottom",
xanchor = "center",
align = "center",
x = 0.5,
y = 1,
showarrow = FALSE
)
data <- afr_cities %>%
filter(closest_metro == hub) %>%
filter(dist_to_metro < 500) %>%
arrange(dist_to_metro)
#fit <- lm(Builtup ~ dist_to_metro, data = data)
fit <- lm(Population_2015 ~ dist_to_metro, data = data)
p <- plot_ly(data, x = ~dist_to_metro, y = ~Population_2015,
height = 800, width = 500) %>%
add_trace(type = "scatter", mode = "markers", hoverinfo = 'text',
text = paste("Name of town: ", data$Agglomeration_Name, "<br>",
"Altitude: ", data$Altitude),
marker = list(size = 3,
color = color[1],
opacity = .33),
showlegend = FALSE) %>%
add_trace(x = ~dist_to_metro, y = fitted(fit), mode = "lines",
name = "",
line = list(width = 2,
color = color[2])) %>%
layout(yaxis = list(title = "Population",
range = c(3, 8),
type = 'log',
exponentformat='E',
zeroline = FALSE),
xaxis = list(title = "Distance from Closest Metro",
zeroline = FALSE),
annotations = a) #%>%
plots <- c(plots, list(p))
}
subplot(plots, nrows = nrow(hub_cities), shareX = TRUE, shareY = TRUE, titleY = TRUE, titleX = TRUE) %>%
layout(annotations = title, showlegend = FALSE) %>%
config(collaborate = FALSE, displaylogo = FALSE, displayModeBar = FALSE)
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
test <- afr_cities %>%
filter(closest_metro %in% hub_cities$closest_metro) %>%
filter(dist_to_metro > 0) %>%
filter(Population_2015 < 500000)
ggplot(test, aes(x = dist_to_metro, y = Population_2015)) +
theme_classic() +
scale_y_log10(labels = scales::comma) +
geom_point(color = "lightblue") +
geom_smooth(method = 'lm', formula = y ~ splines::bs(x, 3), se = FALSE, color = "steelblue") +
facet_grid(rows = vars(factor(closest_metro)))
#Categorical Variable From https://www.governing.com/gov-data/residential-racial-segregation-metro-areas.html
##Reading and formating the data
GET('http://images.centerdigitaled.com/documents/segregation-national-download.csv',
write_disk(us_cities_impt <- tempfile(fileext = ".csv")))
## Response [http://images.centerdigitaled.com/documents/segregation-national-download.csv]
## Date: 2019-04-02 19:41
## Status: 200
## Content-Type: application/csv
## Size: 28.3 kB
## <ON DISK> /var/folders/k5/7wfb52hs61v78z05j3k54rq80000gn/T//RtmpS74KH5/file3cb5fe90d5.csv
us_cities <- read.csv(us_cities_impt)
#Separating city and state
us_cities <- separate(data = us_cities, col = MSA, into = c("City", "State"), sep = ",")
#Cities are on the border of two states and will be removed
us_cities <- us_cities[!grepl("-",us_cities$State),]
#Keeping only columns of interest
us_states <- us_cities[,c("State", "Black.Population", "White.Population..Non.Hispanic.", "Hispanic.Population", "Asian.Population")]
us_states$Black.Population <- as.numeric(gsub(",","", as.character(us_states$Black.Population)))
us_states$White.Population..Non.Hispanic. <- as.numeric(gsub(",","", as.character(us_states$White.Population..Non.Hispanic.)))
us_states$Hispanic.Population <- as.numeric(gsub(",","", as.character(us_states$Hispanic.Population)))
us_states$Asian.Population <- as.numeric(gsub(",","", as.character(us_states$Asian.Population)))
#Grouping race populations by state
us_states <- aggregate(.~ State, us_states, FUN=sum)
#Calculate total sample population of each state
us_states$TotalPop <- apply(us_states[-1],1,sum)
#Convert counts to percentages
us_states$Black.Population <- round(us_states$Black.Population/us_states$TotalPop *100, 1)
us_states$White.Population..Non.Hispanic. <- round(us_states$White.Population..Non.Hispanic./us_states$TotalPop*100, 1)
us_states$Hispanic.Population <- round(us_states$Hispanic.Population/us_states$TotalPop*100, 1)
us_states$Asian.Population <- round(us_states$Asian.Population/us_states$TotalPop*100, 1)
##Plotting racial breakdown of states from population samples
color <- brewer.pal(4, "Set1")
plot_ly(us_states, x = ~State, y = ~Black.Population,
type = 'bar',
name = 'Black Population',
marker = list(color = color[1]),
text = paste("<b>State:</b>", us_states$State, "<br><b>Sample Population:</b>", prettyNum(us_states$TotalPop,big.mark=",",scientific=FALSE)),
hoverinfo = "text+y") %>%
add_trace(y = ~White.Population..Non.Hispanic.,
name = 'White Population',
marker=list(color = color[2])) %>%
add_trace(y = ~Hispanic.Population,
name = 'Hispanic Population',
marker=list(color = color[3])) %>%
add_trace(y = ~Asian.Population,
name = 'Asian Population',
marker=list(color = color[4])) %>%
layout(margin = list(t = 80),
barmode = 'stack',
annotations = list(text = "Racial Breakdown of Urban Areas in US States",
showarrow = FALSE,
font = list(size = 19),
x = 0.5,
xref = "paper",
xanchor = "center",
y = 1.2,
yref = "paper"),
yaxis = list(title = "Percent of Population",
ticksuffix = "%",
zeroline = FALSE)) %>%
add_annotations(text = paste0("Based on Data from 273 Cities in 43 States"),
showarrow = FALSE,
font = list(size = 16),
x = 0.5,
xref = "paper",
xanchor = "center",
y = 1.12,
yref = "paper")
##Looking at a sample of 5 random states Because there is data from 43 states, we decided to choose five random states to look at. Using this subset, we created a second chart below, which is easier to interpret.
set.seed(1)
index <- sample(1:nrow(us_states), 5)
us_sub <- us_states[index,]
plot_ly(us_sub, x = ~State, y = ~Black.Population,
type = 'bar',
name = 'Black Population',
marker = list(color = color[1]),
text = paste("<b>State:</b>", us_sub$State, "<br><b>Sample Population:</b>", prettyNum(us_sub$TotalPop,big.mark=",",scientific=FALSE)),
hoverinfo = "text+y") %>%
add_trace(y = ~White.Population..Non.Hispanic.,
name = 'White Population',
marker=list(color = color[2])) %>%
add_trace(y = ~Hispanic.Population,
name = 'Hispanic Population',
marker=list(color = color[3])) %>%
add_trace(y = ~Asian.Population,
name = 'Asian Population',
marker=list(color = color[4])) %>%
layout(margin = list(t = 80),
barmode = 'stack',
annotations = list(text = "Racial Breakdown of Urban Areas in Five US States",
showarrow = FALSE,
font = list(size = 19),
x = 0.5,
xref = "paper",
xanchor = "center",
y = 1.2,
yref = "paper"),
yaxis = list(title = "Percent of Population",
ticksuffix = "%",
zeroline = FALSE))
States <- c(' NY',' TX',' CA',' FL')
state_us_cities <- us_cities %>% filter(State %in% States)
head(state_us_cities)
## CBSA City State Black.Population
## 1 10180 Abilene TX 13,111
## 2 10580 Albany-Schenectady-Troy NY 64,242
## 3 11100 Amarillo TX 15,077
## 4 12420 Austin-Round Rock-Georgetown TX 138,478
## 5 12540 Bakersfield CA 45,024
## 6 13140 Beaumont-Port Arthur TX 95,980
## White.Population..Non.Hispanic. Hispanic.Population Asian.Population
## 1 110,168 38,892 2,704
## 2 709,495 42,999 37,424
## 3 159,073 72,974 8,283
## 4 1,057,470 643,813 108,635
## 5 310,638 458,907 39,808
## 6 223,131 58,674 10,474
## Black.White.Dissimilarity Hispanic.NonHispanic.Dissimilarity
## 1 0.420 0.344
## 2 0.609 0.351
## 3 0.603 0.387
## 4 0.491 0.377
## 5 0.530 0.463
## 6 0.650 0.386
## Asian.White.Dissimilarity Hispanic.NonHispanic.White..Dissimilarity
## 1 NA 0.379
## 2 0.467 0.413
## 3 NA 0.445
## 4 0.420 0.416
## 5 0.481 0.520
## 6 0.593 0.487
## Hispanic.White.NonHispanic.Black H.NH.Black
## 1 0.425 0.397
## 2 0.446 0.344
## 3 0.409 0.407
## 4 0.345 0.335
## 5 0.499 0.461
## 6 0.482 0.465
state_us_cities$Black.Population<-as.character(state_us_cities$Black.Population)
state_us_cities$Black.Population<-as.numeric(gsub(',','',state_us_cities$Black.Population))
state_us_cities$White.Population..Non.Hispanic.<-as.character(state_us_cities$White.Population..Non.Hispanic.)
state_us_cities$White.Population..Non.Hispanic.<-as.numeric(gsub(',','',state_us_cities$White.Population..Non.Hispanic.))
state_us_cities$Black.White.Dissimilarity<-as.numeric(state_us_cities$Black.White.Dissimilarity)
head(state_us_cities)
## CBSA City State Black.Population
## 1 10180 Abilene TX 13111
## 2 10580 Albany-Schenectady-Troy NY 64242
## 3 11100 Amarillo TX 15077
## 4 12420 Austin-Round Rock-Georgetown TX 138478
## 5 12540 Bakersfield CA 45024
## 6 13140 Beaumont-Port Arthur TX 95980
## White.Population..Non.Hispanic. Hispanic.Population Asian.Population
## 1 110168 38,892 2,704
## 2 709495 42,999 37,424
## 3 159073 72,974 8,283
## 4 1057470 643,813 108,635
## 5 310638 458,907 39,808
## 6 223131 58,674 10,474
## Black.White.Dissimilarity Hispanic.NonHispanic.Dissimilarity
## 1 0.420 0.344
## 2 0.609 0.351
## 3 0.603 0.387
## 4 0.491 0.377
## 5 0.530 0.463
## 6 0.650 0.386
## Asian.White.Dissimilarity Hispanic.NonHispanic.White..Dissimilarity
## 1 NA 0.379
## 2 0.467 0.413
## 3 NA 0.445
## 4 0.420 0.416
## 5 0.481 0.520
## 6 0.593 0.487
## Hispanic.White.NonHispanic.Black H.NH.Black
## 1 0.425 0.397
## 2 0.446 0.344
## 3 0.409 0.407
## 4 0.345 0.335
## 5 0.499 0.461
## 6 0.482 0.465
BL <- state_us_cities %>% group_by(State) %>%
summarise(BLpop = sum(Black.Population), WHpop = sum(White.Population..Non.Hispanic.),BWdis = mean(Black.White.Dissimilarity, na.rm = TRUE)*(BLpop+WHpop))
BL
## # A tibble: 4 x 4
## State BLpop WHpop BWdis
## <chr> <dbl> <dbl> <dbl>
## 1 " CA" 2150235 14168253 8479337.
## 2 " FL" 3012609 10443747 6719207.
## 3 " NY" 472360 4114731 2825648.
## 4 " TX" 2952078 9945298 6301980.
BL_pop <- BL$BLpop
BW_dis <- BL$BWdis
WHpop <- BL$WHpop
data <- data.frame(States,BL_pop,BW_dis)
p <- plot_ly(data,x=~States,y=~BL_pop,type='bar',name='BL Population') %>%
add_trace(y=~BW_dis,name='Black/White Disparity') %>%
add_trace(y=~WHpop,name='WH Population') %>%
layout(yaxis = list(title = 'Population'),title='Black/White Disparity', barmode='group')
p